import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
import folium
import contextily as cx
import rtree
from zlib import crc32
import hashlib
from shapely.geometry import Point, LineString, Polygon
import numpy as np
from scipy.spatial import cKDTree
from shapely.geometry import Point
from haversine import Unit
from geopy.distance import distance
Imports
Filtering
## Importing our DataFrames
gisfilepath = "/Users/jnapolitano/Projects/data/energy/non-active-wells.geojson"
wells_df = gpd.read_file(gisfilepath)
wells_df = wells_df.to_crs(epsg=3857)
## Importing our DataFrames
gisfilepath = "/Users/jnapolitano/Projects/data/energy/Liquified_Natural_Gas_Import_Exports_and_Terminals.geojson"
terminal_df = gpd.read_file(gisfilepath)
terminal_df = terminal_df.to_crs(epsg=3857)
terminal_df.drop(terminal_df[terminal_df['STATUS'] == 'SUSPENDED'].index, inplace = True)
terminal_df.rename(columns={"NAME": "TERMINAL_NAME"})
terminal_df['TERMINAL_GEO'] = terminal_df['geometry'].copy()
terminal_df.columns
Index(['OBJECTID', 'TERMID', 'NAME', 'ADDRESS', 'CITY', 'STATE', 'ZIP', 'ZIP4',
'TELEPHONE', 'TYPE', 'STATUS', 'POPULATION', 'COUNTY', 'COUNTYFIPS',
'COUNTRY', 'LATITUDE', 'LONGITUDE', 'NAICS_CODE', 'NAICS_DESC',
'SOURCE', 'SOURCEDATE', 'VAL_METHOD', 'VAL_DATE', 'WEBSITE', 'EPA_ID',
'ALT_NAME', 'OWNER', 'POSREL', 'JRSDCTN', 'CONTYPE', 'IE_PORT',
'BERTHS', 'STORAGE', 'STORCAP', 'CURRENTCAP', 'APPCAP', 'OPYEAR',
'IMPEXPCTRY', 'VOLUME', 'PRICE', 'geometry', 'TERMINAL_GEO'],
dtype='object')
terminal_map =terminal_df.explore(
column="TYPE", # make choropleth based on "PORT_NAME" column
popup=True, # show all values in popup (on click)
tiles="Stamen Terrain", # use "CartoDB positron" tiles
cmap='Set1', # use "Set1" matplotlib colormap
#style_kwds=dict(color="black"),
marker_kwds= dict(radius=6),
#tooltip=['NAICS_DESC','REGION', 'COMMODITY' ],
legend =True, # use black outline)
categorical=True,
)
terminal_map
According to the data there is not an export nor import location on The Western Side of the United States.
East Asian import or carbon capture export demands could justfity port development. Another study must be conducted.
This method does not accuraletly calculate distance. The algorith used below calculates distance on a euclidan plane. In order to calculate a correct answer we must account for sphericity.
I include the code below as reference and a learning opportunity
def ckdnearest(gdA, gdB):
nA = np.array(list(gdA.geometry.apply(lambda x: (x.x, x.y))))
nB = np.array(list(gdB.geometry.apply(lambda x: (x.x, x.y))))
btree = cKDTree(nB)
dist, idx = btree.query(nA, k=1)
gdB_nearest = gdB.iloc[idx].drop(columns="geometry").reset_index(drop=True)
gdf = pd.concat(
[
gdA.reset_index(drop=True),
gdB_nearest,
pd.Series(dist, name='dist')
],
axis=1)
return gdf
c = ckdnearest(wells_df, terminal_df)
c.describe()
| level_0 | index | OBJECTID | LATITUDE | LONGITUDE | PERMITNO | OPERATORID | SURF_LAT | SURF_LONG | BOT_LAT | ... | LATITUDE | LONGITUDE | BERTHS | STORAGE | STORCAP | CURRENTCAP | APPCAP | VOLUME | PRICE | dist | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 155207.000000 | 155207.000000 | 155207.000000 | 155207.000000 | 155207.000000 | 1.552070e+05 | 1.552070e+05 | 155207.000000 | 155207.000000 | 155207.000000 | ... | 155207.000000 | 155207.000000 | 155207.000000 | 155207.000000 | 155207.000000 | 155207.000000 | 155207.000000 | 155207.000000 | 155207.000000 | 1.552070e+05 |
| mean | 77603.000000 | 330141.223038 | 330142.223038 | 37.388915 | -88.688821 | 2.031513e+06 | 6.847616e+07 | -66.440744 | -179.323395 | -982.734015 | ... | 34.272492 | -86.967254 | -33.938179 | -30.905661 | -129.092163 | 1.649174 | -937.566498 | -134.892939 | -135.078039 | 7.509743e+05 |
| std | 44804.545952 | 232346.189714 | 232346.189714 | 4.392454 | 8.573390 | 3.251915e+06 | 2.567834e+08 | 311.099627 | 273.538141 | 129.138204 | ... | 3.939841 | 12.573387 | 185.950962 | 186.546056 | 350.481192 | 0.519726 | 240.231025 | 348.246757 | 348.084448 | 6.088721e+05 |
| min | 0.000000 | 0.000000 | 1.000000 | 28.899560 | -123.342380 | -9.990000e+02 | -9.990000e+02 | -999.000000 | -999.000000 | -999.000000 | ... | 27.889869 | -116.847415 | -999.000000 | -999.000000 | -999.000000 | 0.000000 | -999.000000 | -999.000000 | -999.000000 | 5.272348e+02 |
| 25% | 38801.500000 | 134028.500000 | 134029.500000 | 32.871595 | -93.874935 | -9.990000e+02 | -9.990000e+02 | 32.028420 | -93.939920 | -999.000000 | ... | 30.112960 | -93.288224 | 2.000000 | 2.000000 | 7.500000 | 1.500000 | -999.000000 | 0.000000 | 0.000000 | 3.683350e+05 |
| 50% | 77603.000000 | 310196.000000 | 310197.000000 | 38.396300 | -87.754143 | -9.990000e+02 | -9.990000e+02 | 38.189080 | -88.392278 | -999.000000 | ... | 31.988522 | -88.503111 | 2.000000 | 4.000000 | 11.000000 | 1.800000 | -999.000000 | 0.000000 | 0.000000 | 5.383780e+05 |
| 75% | 116404.500000 | 348997.500000 | 348998.500000 | 39.222220 | -81.201900 | 3.502644e+06 | -9.990000e+02 | 39.205850 | -81.203860 | -999.000000 | ... | 38.389603 | -76.409092 | 2.000000 | 7.000000 | 14.800000 | 1.800000 | -999.000000 | 9.100000 | 9.370000 | 9.919574e+05 |
| max | 155206.000000 | 969099.000000 | 969100.000000 | 50.744220 | -75.593800 | 2.017004e+07 | 1.044775e+09 | 50.744220 | -75.593800 | 45.179110 | ... | 42.392856 | -71.058151 | 2.000000 | 7.000000 | 18.000000 | 4.000000 | 4.000000 | 932.000000 | 11.340000 | 3.191132e+06 |
8 rows × 25 columns
nearest_wells_df= wells_df.sjoin_nearest(terminal_df, distance_col="distance_euclidian")
nearest_wells_df.describe()
| level_0 | index | OBJECTID_left | LATITUDE_left | LONGITUDE_left | PERMITNO | OPERATORID | SURF_LAT | SURF_LONG | BOT_LAT | ... | LATITUDE_right | LONGITUDE_right | BERTHS | STORAGE | STORCAP | CURRENTCAP | APPCAP | VOLUME | PRICE | distances | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 155207.000000 | 155207.000000 | 155207.000000 | 155207.000000 | 155207.000000 | 1.552070e+05 | 1.552070e+05 | 155207.000000 | 155207.000000 | 155207.000000 | ... | 155207.000000 | 155207.000000 | 155207.000000 | 155207.000000 | 155207.000000 | 155207.000000 | 155207.000000 | 155207.000000 | 155207.000000 | 1.552070e+05 |
| mean | 77603.000000 | 330141.223038 | 330142.223038 | 37.388915 | -88.688821 | 2.031513e+06 | 6.847616e+07 | -66.440744 | -179.323395 | -982.734015 | ... | 34.272492 | -86.967254 | -33.938179 | -30.905661 | -129.092163 | 1.649174 | -937.566498 | -134.892939 | -135.078039 | 7.509743e+05 |
| std | 44804.545952 | 232346.189714 | 232346.189714 | 4.392454 | 8.573390 | 3.251915e+06 | 2.567834e+08 | 311.099627 | 273.538141 | 129.138204 | ... | 3.939841 | 12.573387 | 185.950962 | 186.546056 | 350.481192 | 0.519726 | 240.231025 | 348.246757 | 348.084448 | 6.088721e+05 |
| min | 0.000000 | 0.000000 | 1.000000 | 28.899560 | -123.342380 | -9.990000e+02 | -9.990000e+02 | -999.000000 | -999.000000 | -999.000000 | ... | 27.889869 | -116.847415 | -999.000000 | -999.000000 | -999.000000 | 0.000000 | -999.000000 | -999.000000 | -999.000000 | 5.272348e+02 |
| 25% | 38801.500000 | 134028.500000 | 134029.500000 | 32.871595 | -93.874935 | -9.990000e+02 | -9.990000e+02 | 32.028420 | -93.939920 | -999.000000 | ... | 30.112960 | -93.288224 | 2.000000 | 2.000000 | 7.500000 | 1.500000 | -999.000000 | 0.000000 | 0.000000 | 3.683350e+05 |
| 50% | 77603.000000 | 310196.000000 | 310197.000000 | 38.396300 | -87.754143 | -9.990000e+02 | -9.990000e+02 | 38.189080 | -88.392278 | -999.000000 | ... | 31.988522 | -88.503111 | 2.000000 | 4.000000 | 11.000000 | 1.800000 | -999.000000 | 0.000000 | 0.000000 | 5.383780e+05 |
| 75% | 116404.500000 | 348997.500000 | 348998.500000 | 39.222220 | -81.201900 | 3.502644e+06 | -9.990000e+02 | 39.205850 | -81.203860 | -999.000000 | ... | 38.389603 | -76.409092 | 2.000000 | 7.000000 | 14.800000 | 1.800000 | -999.000000 | 9.100000 | 9.370000 | 9.919574e+05 |
| max | 155206.000000 | 969099.000000 | 969100.000000 | 50.744220 | -75.593800 | 2.017004e+07 | 1.044775e+09 | 50.744220 | -75.593800 | 45.179110 | ... | 42.392856 | -71.058151 | 2.000000 | 7.000000 | 18.000000 | 4.000000 | 4.000000 | 932.000000 | 11.340000 | 3.191132e+06 |
8 rows × 26 columns
#df.geopy.distance.distance(coords_1, coords_2).km
#df.apply(lambda row: distance(row['point'], row['point_next']).km if row['point_next'] is not None else float('nan'), axis=1)
# Thanks to https://stackoverflow.com/questions/55909305/using-geopy-in-a-dataframe-to-get-distances
nearest_wells_df['true_distance_km'] = nearest_wells_df.apply(lambda row: distance((row['LATITUDE_left'], row['LONGITUDE_left']), (row['LATITUDE_right'], row['LONGITUDE_right'])).km if row['geometry'] is not None else float('nan'), axis=1)
nearest_wells_df.describe()
| level_0 | index | OBJECTID_left | LATITUDE_left | LONGITUDE_left | PERMITNO | OPERATORID | SURF_LAT | SURF_LONG | BOT_LAT | ... | BERTHS | STORAGE | STORCAP | CURRENTCAP | APPCAP | VOLUME | PRICE | distances | true_distance | true_distance_km | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 155207.000000 | 155207.000000 | 155207.000000 | 155207.000000 | 155207.000000 | 1.552070e+05 | 1.552070e+05 | 155207.000000 | 155207.000000 | 155207.000000 | ... | 155207.000000 | 155207.000000 | 155207.000000 | 155207.000000 | 155207.000000 | 155207.000000 | 155207.000000 | 1.552070e+05 | 155207.000000 | 155207.000000 |
| mean | 77603.000000 | 330141.223038 | 330142.223038 | 37.388915 | -88.688821 | 2.031513e+06 | 6.847616e+07 | -66.440744 | -179.323395 | -982.734015 | ... | -33.938179 | -30.905661 | -129.092163 | 1.649174 | -937.566498 | -134.892939 | -135.078039 | 7.509743e+05 | 597.548375 | 597.548375 |
| std | 44804.545952 | 232346.189714 | 232346.189714 | 4.392454 | 8.573390 | 3.251915e+06 | 2.567834e+08 | 311.099627 | 273.538141 | 129.138204 | ... | 185.950962 | 186.546056 | 350.481192 | 0.519726 | 240.231025 | 348.246757 | 348.084448 | 6.088721e+05 | 471.841586 | 471.841586 |
| min | 0.000000 | 0.000000 | 1.000000 | 28.899560 | -123.342380 | -9.990000e+02 | -9.990000e+02 | -999.000000 | -999.000000 | -999.000000 | ... | -999.000000 | -999.000000 | -999.000000 | 0.000000 | -999.000000 | -999.000000 | -999.000000 | 5.272348e+02 | 0.454603 | 0.454603 |
| 25% | 38801.500000 | 134028.500000 | 134029.500000 | 32.871595 | -93.874935 | -9.990000e+02 | -9.990000e+02 | 32.028420 | -93.939920 | -999.000000 | ... | 2.000000 | 2.000000 | 7.500000 | 1.500000 | -999.000000 | 0.000000 | 0.000000 | 3.683350e+05 | 312.526461 | 312.526461 |
| 50% | 77603.000000 | 310196.000000 | 310197.000000 | 38.396300 | -87.754143 | -9.990000e+02 | -9.990000e+02 | 38.189080 | -88.392278 | -999.000000 | ... | 2.000000 | 4.000000 | 11.000000 | 1.800000 | -999.000000 | 0.000000 | 0.000000 | 5.383780e+05 | 420.917051 | 420.917051 |
| 75% | 116404.500000 | 348997.500000 | 348998.500000 | 39.222220 | -81.201900 | 3.502644e+06 | -9.990000e+02 | 39.205850 | -81.203860 | -999.000000 | ... | 2.000000 | 7.000000 | 14.800000 | 1.800000 | -999.000000 | 9.100000 | 9.370000 | 9.919574e+05 | 819.680379 | 819.680379 |
| max | 155206.000000 | 969099.000000 | 969100.000000 | 50.744220 | -75.593800 | 2.017004e+07 | 1.044775e+09 | 50.744220 | -75.593800 | 45.179110 | ... | 2.000000 | 7.000000 | 18.000000 | 4.000000 | 4.000000 | 932.000000 | 11.340000 | 3.191132e+06 | 2390.537825 | 2390.537825 |
8 rows × 28 columns
filtered_wells = nearest_wells_df.loc[nearest_wells_df['true_distance_km'] < 50].copy()
filtered_wells.describe()
| level_0 | index | OBJECTID_left | LATITUDE_left | LONGITUDE_left | PERMITNO | OPERATORID | SURF_LAT | SURF_LONG | BOT_LAT | ... | BERTHS | STORAGE | STORCAP | CURRENTCAP | APPCAP | VOLUME | PRICE | distances | true_distance | true_distance_km | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 2419.000000 | 2419.000000 | 2419.000000 | 2419.000000 | 2419.000000 | 2419.0 | 2419.0 | 2419.000000 | 2419.000000 | 2419.0 | ... | 2419.000000 | 2419.000000 | 2419.000000 | 2419.000000 | 2419.000000 | 2419.000000 | 2419.000000 | 2419.000000 | 2419.000000 | 2419.000000 |
| mean | 110553.939644 | 56938.078545 | 56939.078545 | 30.099833 | -93.410064 | -999.0 | -999.0 | 29.248880 | -94.163194 | -999.0 | ... | -412.634973 | -411.768913 | -407.436213 | 0.960054 | -331.037859 | -412.265399 | -413.798892 | 24377.116351 | 21.074003 | 21.074003 |
| std | 13845.940949 | 46394.611417 | 46394.611417 | 0.174169 | 0.259045 | 0.0 | 0.0 | 29.585044 | 26.034588 | 0.0 | ... | 493.181504 | 493.910204 | 497.556930 | 1.112064 | 472.272942 | 494.941986 | 492.202588 | 14357.926480 | 12.401262 | 12.401262 |
| min | 45760.000000 | 25.000000 | 26.000000 | 29.680190 | -93.835970 | -999.0 | -999.0 | -999.000000 | -999.000000 | -999.0 | ... | -999.000000 | -999.000000 | -999.000000 | 0.000000 | -999.000000 | -999.000000 | -999.000000 | 527.234827 | 0.454603 | 0.454603 |
| 25% | 98259.000000 | 9611.500000 | 9612.500000 | 30.007900 | -93.595860 | -999.0 | -999.0 | 30.007660 | -93.596210 | -999.0 | ... | -999.000000 | -999.000000 | -999.000000 | 0.000000 | -999.000000 | -999.000000 | -999.000000 | 12188.335604 | 10.516375 | 10.516375 |
| 50% | 112513.000000 | 49491.000000 | 49492.000000 | 30.038510 | -93.425860 | -999.0 | -999.0 | 30.037810 | -93.426540 | -999.0 | ... | 2.000000 | 3.000000 | 9.000000 | 0.710000 | 1.410000 | 0.000000 | 0.000000 | 27088.275830 | 23.374141 | 23.374141 |
| 75% | 122872.500000 | 100271.500000 | 100272.500000 | 30.252630 | -93.335890 | -999.0 | -999.0 | 30.252615 | -93.336045 | -999.0 | ... | 2.000000 | 3.000000 | 11.000000 | 1.800000 | 4.000000 | 0.000000 | 0.000000 | 32588.563428 | 28.230653 | 28.230653 |
| max | 134215.000000 | 391588.000000 | 391589.000000 | 30.561100 | -88.052220 | -999.0 | -999.0 | 30.561100 | -92.782800 | -999.0 | ... | 2.000000 | 5.000000 | 16.900000 | 4.000000 | 4.000000 | 932.000000 | 4.620000 | 58104.197789 | 49.997185 | 49.997185 |
8 rows × 28 columns
filtered_wells.explore(
column="STATUS_left", # make choropleth based on "PORT_NAME" column
popup=True, # show all values in popup (on click)
tiles="Stamen Terrain", # use "CartoDB positron" tiles
cmap='Set1', # use "Set1" matplotlib colormap
#style_kwds=dict(color="black"),
marker_kwds= dict(radius=6),
#tooltip=['NAICS_DESC','REGION', 'COMMODITY' ],
legend =True, # use black outline)
categorical=True,)